%% Copyright: National ICT Australia, 2005-2006
%% Author   : Alexander Smola
%%            Choon Hui Teo
%%            Le Song
%% License  : This file is licensed under the GNU Public License. See
%%            www.gnu.org for details on the license.

\documentclass[a0, landscape]{nictalaymanlandscape}

\usepackage{mdwlist}
\usepackage{shadow}
\usepackage{times}
\usepackage{algorithmic}
\usepackage{moreverb}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{graphicx}
\usepackage{subfig}
\usepackage{multirow}
\usepackage{fancybox}
\usepackage{pifont}

\graphicspath{{./}{./eps/}}

\newcommand{\casemax}{\mathrm{casemax}}
\newcommand{\casemin}{\mathrm{casemin}}
\newcommand{\UB}{\mathit{UB}}
\newcommand{\LB}{\mathit{LB}}
\newcommand{\IND}{\mathit{Ind}}
\newcommand{\CONS}{\mathit{Cons}}
\newcommand{\Root}{\mathit{Root}}
\newcommand{\Max}{\mathit{Max}}
\newcommand{\sq}{\hspace{-1mm}}
\newcommand{\sqm}{\hspace{-2mm}}

%------------------------------------------------------------
% number of columns in the poster:
%------------------------------------------------------------

%\columnfrac{0.3}% three columns
\columnfrac{0.32}% three columns

%%%%%%%%%%%%%%% local definitions %%%%%%%%%%%%%%%%%%%%%%%%%

\newcommand{\bl}[1]{{\color{nictablue}#1}}

%------------------------------------------------------------
% Now we start our poster:

%------------------------------------------------------------

%%%%%%%%% define headers %%%%%%%%%%%%
\title{Symbolic Dynamic Programming \\for Continuous State and Observation POMDPs}
%\staff{Authors}
\authors{Zahra Zamani (NICTA \& the ANU, Australia), Scott Sanner (NICTA \& the ANU, Australia), \\ Pascal Poupart (University of Waterloo, Canada), Kristian Kersting (Fraunhofer IAIS, University of Bonn, Germany) %[0.2in]
}


\content{

\begin{pcolumn}
\section{Highlight}
{\textbf{\color{red} Goal:}} & 1: Exact Value Iteration for continuous state POMDPs\\
& 2: Exact point-based backup for continuous state and observation POMDPs\\
\vspace{-1.5em}
\begin{center}
 \shadowbox{
 \begin{minipage}{0.8\columnwidth}
{\color{nictablue} 
\vspace{-.35em} 
\begin{center} 
\begin{align}
V^h(\vec{b}) &= \max\nolimits_{\alpha^h_i \in \Gamma^h} \alpha^h_i \cdot \vec{b} \label{eq:vfun} \\
g^h_{a,o,j} &=  \int_{x_{s'}} \sum_{d_{s'}} p(o|xd'_s,a)p(xd'_s|xd_s,a) \alpha^{h-1}_j(xd'_s) ; \hspace{2mm}  \forall \alpha^{h-1}_{j} \in \Gamma^{h-1} \label{eq:backup} \\
\Gamma^{h}_a   &= r_a + \gamma \textrm{\large $\boxplus$}_{o \in \mathcal{O}} \left\{ g^h_{a,o,j}(\cdot) \right\}_j  \label{eq:cross_prod}, \hspace{5mm} 
\Gamma^h  = \bigcup_a \Gamma^h_a 
\end{align}
\end{center}\vspace{-0.5em} }
 \end{minipage}
}
\end{center}
\begin{tabular}{ll}
{\textbf{\color{red} Tools:}}
& 1: Symbolic Dynamic Programming approach(SDP)\\
& 2: Efficient data-structure of Extended ADDs (XADDs)
\end{tabular}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{DC-POMDPs}
\vspace{-1.85em}
\begin{itemize}
\item { \color{red} Discrete and Continuous State Space} : $(\vec{d_s},\vec{x_s})$ where $d_{s_i} \in \{ 0,1 \}$ and $x_{s_j} \in \mathbb{R}$. %($1\leq i \leq n, 1 \leq j \leq m$).
\item { \color{red} Discrete and Continuous Opservation Space} : $(\vec{d_o},\vec{x_o})$ where $d_{o_i} \in \{ 0,1 \}$ and $x_{o_j} \in \mathbb{R}$.% ($1\leq i \leq p, 1 \leq j \leq q$).
\item {\color{red} Discrete Action Space} : $A = \{a_1, \ldots, a_k) \}$
\item {\color{red}Transition Model} : Joint DBN of Conditional Probability Functions (CPF) and Piecewise Linear Equations (PLE)
\item {\color{red}Reward Model} : Piecewise Linear function
{%\footnotesize
\begin{equation}
P(\vec{d_s}',\vec{x_s}'|\vec{d_s},\vec{x_s},a) = 
\prod_{i=1}^n P(d_{s_i}'|\vec{d_s},\vec{x_s},a) \prod_{j=1}^m P(x_{s_j}'|\vec{d_s},\vec{d_s}',\vec{x_s},a). \nonumber
\end{equation}}
\begin{align} 
P(t_s'|\vec{t_s},a)= \delta\left[ t_s' - 
\begin{cases}
 (a=\open) &: t_s - 5 \\ 
(a \neq \open) &: t_s + 7 \\
\end{cases}
\right]
\hspace{2mm}
R(t_s,a) = 
\begin{cases}
 (a=\open) &: -1 \\
(a \neq \open)\wedge (t_s>T) &: -1000 \\
(a \neq \open)\wedge \neg(t_s>T) &: 100 \\
\end{cases}\label{eq:transR} 
\end{align} 
\item {\color{red}Observation Model} : Joint DBN of Conditional Probability Functions (CPF) and Piecewise Linear Equations (PLE):
{%\footnotesize
\begin{equation}
P(\vec{d_o},\vec{x_o}|\vec{d_s},\vec{x_s},a) = 
\prod_{i=1}^n P(d_{o_i}|d_{s_i},a) \prod_{j=1}^m P(x_{o_j}|x_{s_j},a).\nonumber 
\end{equation}} 
\begin{align} 
P(d_o=1|t_s',\open) = 
\begin{cases}
  t_s' <= 15 : 0.9 \\
 t_s' > 15: 0.1 \\
\end{cases}
\hspace{1mm}
P(t_o|t_s',\open) = 
\begin{cases}
 (t_o>t_s'-5) \wedge (t_o<t_s'+5) &: 0.1 \\
 (t_o<t_s'-5) \wedge (t_o<t_s'+5)) &: 0 \\
\end{cases}\label{eq:obs} 
\end{align} 
\end{itemize}
\vspace{-1em}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Case statements and operators}
{\color{black} \bf Support for Unary and Binary operations $c\cdot f$ , $-f$, , $\oplus$,$\ominus$, $\otimes$} 
\vspace{-4mm}
{\footnotesize 
\begin{center}
\begin{tabular}{r c c c l}
&
\hspace{-6mm} 
  $\begin{cases}
    \phi_1: & f_1 \\ 
    \phi_2: & f_2 \\ 
  \end{cases}$
$\oplus$
&
\hspace{-4mm}
  $\begin{cases}
    \psi_1: & g_1 \\ 
    \psi_2: & g_2 \\ 
  \end{cases}$
&
\hspace{-2mm} 
$ = $
&
\hspace{-2mm}
  $\begin{cases}
  \phi_1 \wedge \psi_1: & f_1 + g_1 \\ 
  \phi_1 \wedge \psi_2: & f_1 + g_2 \\ 
  \phi_2 \wedge \psi_1: & f_2 + g_1 \\ 
  \phi_2 \wedge \psi_2: & f_2 + g_2 \\ 
  \end{cases}$
\end{tabular}
\end{center}
}
\vspace{-6mm}
{\color{black} \bf Support for Symbolic Maximization ($\mathrm{max}(f_1,f_2)$) , Integration ($\int_{x_1} f dx_1$), Substitution ($f\sigma$) and Restriction ($f|_{\phi}$)}

\end{pcolumn}
%
\hfill
%
\begin{pcolumn}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%\section{Optimization}
\section{Theoretical Contribution}
\vspace{-2em}

\subsection{PBVI for Continuous state, Discrete observation}
%{\bf \color{red} 
{Implementing PBVI using SDP operations on case statements for horizon $h$:} 
\vspace{-.5em}
\begin{center}
\shadowbox{
\begin{minipage}{0.9\columnwidth}\color{nictablue}
%\footnotetext{ Note that $\int f(x'_j) \otimes \delta[x_j' - h(\vec{z})] dx'_j = f(x'_j) \{ x'_j / h(\vec{z}) \}$ where the latter operation indicates that any occurrence of $x'_j$ in $f(x'_j)$ is \emph{symbolically substituted} with the case statement $h(\vec{z})$. }
\begin{align*}
&\mbox{\color{black} For all beliefs $b_i$ in $B$, and all $a_k$ in $A$}\\
& \hspace{12mm} \mbox{\color{black} For all $o$ in $O_i^h$, and all $\alpha_j^{h-1}$ in $\Gamma_{PBVI}^{h-1}$}\\
& \hspace{20mm} \alpha_j^{h'-1}= Prime(\alpha_j^{h-1}) \;\; \mbox{\color{black} [$\forall$ $d_i \to d_i'$ and $\forall x_i \to x_i'$]} \nonumber\\
& \hspace{20mm} g^h_{a,o,j} =  \int_{x_{s'}} \large \bigoplus_{d_{s'}} p(o|xd'_s,a) \otimes p(xd'_s|xd_s,a) \otimes \alpha^h_j(xd'_s) \;\; \mbox{\color{black} \footnotesize{[Marginal Integration and Summation]}}\nonumber \\
&\hspace{12mm} \mbox{\color{black} Perform(3) and Prune($\Gamma^h$)} \\
&\mbox{\color{black} For each beliefs $b_i$ in $B$}\\
& \hspace{12mm} \alpha_{b_i}^h :=\ \argmax_{\alpha_j \in \Gamma^h} \alpha_j \cdot b_i   \hspace{10mm} , \hspace{10mm} \Gamma_{PBVI}^h :=\ \Gamma_{PBVI}^h \cup \alpha_{b_i}^h \nonumber\\
\end{align*}
\vspace{-1.7em}
\end{minipage}
}
\end{center}
\vspace{-.5em}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

\subsection{PBVI for Continuous state, Continuous observation }
PBVI in (2) requires partitioning of the observation space such that $P(o|xd'_s,a)$ is defined. 
The relevant observations and their probability is defined for a belief $b_i$ as the following: 
\vspace{-1.3em}
\begin{center}
\shadowbox{
\begin{minipage}{0.9\columnwidth}\color{nictablue}
\begin{align*}
&\mbox{\color{black} I- For a belief $b_i$ and For all $\alpha_j$ in  $\Gamma^{h-1}$ perform point-based backup:}\\
& \hspace{9mm} \alpha_j(xd_s,xd_o) := \int_{x_s'} \bigoplus_{d_s'} P(xd_{o}|xd'_{s},a) \cdot P(xd'_{s}| xd_{s},a)\cdot \alpha_j \hspace{1mm}dx_s' \\
&\mbox{\color{black} II- For all $j$ in $\alpha_j(xd_s,xd_o)$ generate function of observations for belief $b_i$:}\\
& \hspace{9mm} \delta_{j}(xd_o) := \int_{x_{s}} \bigoplus_{d_s} b_i \cdot \alpha_j(xd_s,xd_o)\hspace{1mm} dx_s\\ 
&\mbox{\color{black} III- For all $j$ in $\delta_{j}(xd_o)$ generate partition points on the observation space:}\\
& \hspace{9mm} O := max(\delta_j(xd_o),\delta_{j+1}(xd_o)) \;\; \mbox{\color{red} [$\phi_{o_k}$ logical term on each partition of $O$]}\\
&\mbox{\color{black} IV- Generate probabilities that distinguish optimal $\alpha$-vectors::}\\
& \hspace{9mm} P(O=o_k|b_i) := \int_{x_s'} \int_{x_o} \int_{x_s} \bigoplus_{d_o} \bigoplus_{d_s} \bigoplus_{d_s'} P(xd_o|xd'_s,a)*P(xd'_s|xd_s,a)*b_i* \mathbb{I}[\phi_{o_k}] d_{x_o} d_{x_s}d_{x_s'}\\
\end{align*}
\vspace{-1.9em}
\end{minipage}
}
\end{center}
\vspace{-.5em}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\subsection{Step through Continuous observation Algorithm}
For a uniform belief $b_2: U[t_s;6,11]$ using the example in (4) and (5): 
%\vspace{-.5em}
\begin{align*}
 \hspace{2mm} 
{\footnotesize
\alpha_{close}^2(t_s,t_o) = 
\begin{cases}
 (t_s<15)\wedge (t_s - 10 < t_o<t_s): 10 \\
(t_s>15)\wedge (t_s - 10 < t_o<t_s): -100  \\
(t_s - 10 > t_o<t_s): 0
\end{cases}
\hspace{4mm} 
\alpha_{open}^2(t_s,t_o) = \begin{cases}
(t_s - 10 < t_o<t_s): -0.1 \\
(t_s - 10 > t_o<t_s): 0
\end{cases} }\;\; \mbox{\color{black} [I]}\\
\end{align*}
Observation dependent $\delta$-functions divide the observation space into regions
\begin{align*}
{\footnotesize
\delta_{close}^{b_2}(t_o) = 
\begin{cases}
 (14 < t_o< 18): 0.025*t_o - 0.45\\
 (8 < t_o< 14):  - 0.1\\
 (4 < t_o< 8): - 0.025*t_o -0.1\\
\end{cases}
\hspace{4mm} 
\delta_{open}^{b_2}(t_o) = \begin{cases}
 (15 < t_o< 18): 25*t_o - 450\\
 (14 < t_o< 15): - 2.5*t_o - 37.5\\
 (8 < t_o< 14):  -72.5\\
 (5 < t_o< 8): - 25*t_o + 127.5\\
 (4 < t_o< 5):  2.5*t_o - 10\\
\end{cases}} \hspace{0.7em} \;\; \mbox{\color{black} [II]}\\
\end{align*}
\end{pcolumn}
%
\hfill
%
\begin{pcolumn}
\vspace{.5em}
\subsection{Step through Continuous observation Algorithm (cont'd)}
Find boundary regions where each $\delta$-function is optimal using maximization: 
\begin{align*}
{\footnotesize
\mathrm{max} \Bigg(\delta_{close}^{b_2}(t_o),\delta_{open}^{b_2}(t_o)\Bigg) = 
\begin{cases}
o_1: (14 < t_o< 18) &: 0.025*t_o - 0.45\\
o_1: (8 < t_o< 14) &:  -0.1\\
o_1: (5.1 < t_o< 8) &: - 0.025*t_o -0.1\\
o_2: (5 < t_o< 5.1) &: - 25*t_o + 127.5\\
o_2: (4 < t_o< 5) &:  2.5*t_o - 10\\
\end{cases}} \hspace{0.9em} \;\; \mbox{\color{black} [III]}\\
\end{align*}
Multiply the observation partition indicators to obtain the probability mass: 
\begin{align*}
{\footnotesize
P(o_k|b_2) = 
\begin{cases}
(\phi_1 : 14 < t_o< 18) & : 0.2\\
(\phi_2 : 8 < t_o< 14) & :  0.6\\
(\phi_3 : 5.1 < t_o< 8) & : 0.183\\
(\phi_4 : 5 < t_o< 5.1) & : 0.0002\\
(\phi_5 : 4 < t_o< 5) & :  0.0125\\
\end{cases} , \hspace{0.7em}
\begin{cases}
o_1&: \phi_1 \vee \phi_2 \vee \phi_3 \longrightarrow P(o_1|b_2)=0.983 \\
o_2&: \phi_4 \vee \phi_5  \longrightarrow P(o_2|b_2)=0.0127
\end{cases}}\hspace{0.5em}
\;\; \mbox{\color{black} [IV]}\\ 
\vspace{-0.9em}
\end{align*}
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Empirical Results}
{\bf \color{red} Power Plant} : First Exact closed-form solution for safe 1D ( temperature control, shown above) and 2D ( temperature and pressure control) problems.\\
{\color{red}Objective:} Produce electricity through steam production when pressure (p) and temperature (t) are within safe ranges.
Mixing water and steam makes the respective pressure and temperature observations highly uncertain.

\vspace{1mm}
\includegraphics[width=0.3\textwidth]{time2.pdf}
\includegraphics[width=0.3\textwidth]{nodes2.pdf}
\includegraphics[width=0.3\textwidth]{alpha-vectors2.pdf}\vspace{-8mm}
\vspace{-3mm}
\hspace{70mm}
\\
The above compares time, space and number of $\alpha$-vectors for each horizon of the 2D problem.\\
\vspace{-8mm}
\includegraphics[width=0.31\textwidth]{2d1-4.pdf}
\includegraphics[width=0.31\textwidth]{2d9-4.pdf}
\includegraphics[width=0.31\textwidth]{2d111-4.pdf}
{\it (left)}Maximum $\delta$-vector for $b_1$ and action of $close$ valve in first iteration; 
{\it (center)} $\delta_{close}^5(b_1)$; {\it (right)} $\delta_{close}^6(b_1)$. Finer grain partitions show that closing (or opening) the valve can occur at more exact temperatures at higher horizons. 
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
\section{Summary}
\vspace{-1.7em}
\begin{itemize}
\item {\color{red} \bf Continuous State and Observation PBVI algorithm} using symbolic solutions.
\item {\color{red} \bf First exact solution} to multi-variate continuous state and observation POMDPs, discrete noise and piecewise linear dynamics.
\end{itemize}

\end{pcolumn}}

\notes{}

\begin{document}
\makeposter
\end{document}
